Setup

Load needed libraries

library(tidyverse)
library(phyloseq)
library(vegan)
library(ggplot2)
library(ggtree)
library(gplots)

Results

Alpha Diversity

# This object has day 0 and amendement samples removed and is rarefied to 6000
physeq <- readRDS("data/IncPhyseqRareClusteredTree")
plot_richness(physeq)

shannon <- plot_richness(physeq, measures = "Shannon")
shannon.df <- shannon$data
# load summary function
source("Functions/summarySE.R")
summary.shannon.df <- summarySE(shannon.df, measurevar = "value", groupvars = c("treatment", "day"))
pd <- position_dodge(0.2) # move them .05 to the left and right
ggplot(summary.shannon.df, aes(x=day, y=value, colour=treatment)) + 
    geom_errorbar(aes(ymin=value-se, ymax=value+se), width=.1, position=pd) +
    geom_point(position=pd) + 
    ggtitle("Shannon diversity")

Beta Diversity

Now let’s plot an PCA and NMDS ordination of the weighted unifrac distances, which uses the phylogenetic tree and considers the phylogenetic relationship between OTUs when calculating the distance matrix

PCA

PCoA <- ordinate(physeq, "PCoA", "wunifrac")
plot_ordination(physeq, PCoA, color = "day") + stat_ellipse(geom = "polygon", type = "norm", alpha = 0.2, aes(fill = day))

plot_ordination(physeq, PCoA, color = "treatment") + stat_ellipse(geom = "polygon", type = "norm", alpha = 0.2, aes(fill = treatment))

NMDS

NMDS <- ordinate(physeq, "NMDS", "wunifrac")
## Run 0 stress 0.1203498 
## Run 1 stress 0.1282782 
## Run 2 stress 0.1282787 
## Run 3 stress 0.1282788 
## Run 4 stress 0.1282792 
## Run 5 stress 0.1282787 
## Run 6 stress 0.1282838 
## Run 7 stress 0.1203496 
## ... New best solution
## ... Procrustes: rmse 0.0001782697  max resid 0.002449554 
## ... Similar to previous best
## Run 8 stress 0.1203504 
## ... Procrustes: rmse 0.0002520013  max resid 0.003154697 
## ... Similar to previous best
## Run 9 stress 0.120349 
## ... New best solution
## ... Procrustes: rmse 0.0002276444  max resid 0.002415201 
## ... Similar to previous best
## Run 10 stress 0.1282826 
## Run 11 stress 0.1203494 
## ... Procrustes: rmse 0.0001223545  max resid 0.001863789 
## ... Similar to previous best
## Run 12 stress 0.1203495 
## ... Procrustes: rmse 0.0002267954  max resid 0.002410693 
## ... Similar to previous best
## Run 13 stress 0.1203494 
## ... Procrustes: rmse 0.0002374282  max resid 0.002480501 
## ... Similar to previous best
## Run 14 stress 0.1282779 
## Run 15 stress 0.1203503 
## ... Procrustes: rmse 0.0001948203  max resid 0.001983618 
## ... Similar to previous best
## Run 16 stress 0.1203495 
## ... Procrustes: rmse 0.0002267268  max resid 0.002409881 
## ... Similar to previous best
## Run 17 stress 0.1203487 
## ... New best solution
## ... Procrustes: rmse 0.0001775879  max resid 0.002425884 
## ... Similar to previous best
## Run 18 stress 0.1203503 
## ... Procrustes: rmse 0.0002290085  max resid 0.002714327 
## ... Similar to previous best
## Run 19 stress 0.1203493 
## ... Procrustes: rmse 0.0002148478  max resid 0.002407078 
## ... Similar to previous best
## Run 20 stress 0.1203503 
## ... Procrustes: rmse 0.0002171407  max resid 0.002462118 
## ... Similar to previous best
## *** Solution reached
plot_ordination(physeq, NMDS, color = "day") + stat_ellipse(geom = "polygon", type = "norm", alpha = 0.2, aes(fill = day))

plot_ordination(physeq, NMDS, color = "treatment") + stat_ellipse(geom = "polygon", type = "norm", alpha = 0.2, aes(fill = treatment))

Aliens

Table with # of aliens for each treatment and what period of the incubation they were detected, early/late or throughout.

The lists of alien OTUs detected in amended microcosms was generated with , we can load those lists along with the phyloseq object with clustering information from :

alf.aliens <- readRDS("data/alf.aliens.rds")
comp.aliens <- readRDS("data/comp.aliens.rds")
mix.aliens <- readRDS("data/mix.aliens.rds")
# Read in the phylsoeq object from the clustering script
alf <- prune_samples(sample_data(physeq)$treatment %in% c("Alfalfa"), physeq) %>%
  filter_taxa(function(x) sum(x) > 1, T)
early.alf <- prune_samples(sample_data(alf)$response.group %in% c("early"), alf) %>%
  filter_taxa(function(x) sum(x) > 1, T)
late.alf <- prune_samples(sample_data(alf)$response.group %in% c("late"), alf) %>%
  filter_taxa(function(x) sum(x) > 1, T)

number.aliens.early.alf <- prune_taxa(as.character(alf.aliens), early.alf) %>%
  filter_taxa(function(x) sum(x) > 1, T) %>%
  otu_table() %>%
  nrow()

number.aliens.late.alf <- prune_taxa(as.character(alf.aliens), late.alf) %>%
  filter_taxa(function(x) sum(x) > 1, T) %>%
  otu_table() %>%
  nrow()

number.aliens.total.alf <- prune_taxa(as.character(alf.aliens), alf) %>%
  filter_taxa(function(x) sum(x) > 1, T) %>%
  otu_table() %>%
  nrow()

# print("Number of alfalfa alien OTUs detected in all alfalfa")
# number.aliens.total.alf
# print("Total number of OTUs detected in all alfalfa")
# nrow(otu_table(alf))
# print("Number of alfalfa alien OTUs detected in early alfalfa")
# number.aliens.early.alf
# print("Total number of OTUs detected in early alfalfa")
# nrow(otu_table(early.alf))
# print("Number of alfalfa alien OTUs detected in late alfalfa")
# number.aliens.late.alf
# print("Total number of OTUs detected in late alfalfa")
# nrow(otu_table(late.alf))
comp <- prune_samples(sample_data(physeq)$treatment %in% c("Compost"), physeq) %>%
  filter_taxa(function(x) sum(x) > 1, T)
early.comp <- prune_samples(sample_data(comp)$response.group %in% c("early"), comp) %>%
  filter_taxa(function(x) sum(x) > 1, T)
late.comp <- prune_samples(sample_data(comp)$response.group %in% c("late"), comp) %>%
  filter_taxa(function(x) sum(x) > 1, T)

number.aliens.early.comp <- prune_taxa(as.character(comp.aliens), early.comp) %>%
  filter_taxa(function(x) sum(x) > 1, T) %>%
  otu_table() %>%
  nrow()

number.aliens.late.comp <- prune_taxa(as.character(comp.aliens), late.comp) %>%
  filter_taxa(function(x) sum(x) > 1, T) %>%
  otu_table() %>%
  nrow()

number.aliens.total.comp <- prune_taxa(as.character(comp.aliens), comp) %>%
  filter_taxa(function(x) sum(x) > 1, T) %>%
  otu_table() %>%
  nrow()

# print("Number of Compost alien OTUs detected in all Compost")
# number.aliens.total.comp
# print("Total number of OTUs detected in all Compost")
# nrow(otu_table(comp))
# print("Number of Compost alien OTUs detected in early Compost")
# number.aliens.early.comp
# print("Total number of OTUs detected in early Compost")
# nrow(otu_table(early.comp))
# print("Number of Compost alien OTUs detected in late Compost")
# number.aliens.late.comp
# print("Total number of OTUs detected in late Compost")
# nrow(otu_table(late.comp))
mix <- prune_samples(sample_data(physeq)$treatment %in% c("Mix"), physeq) %>%
  filter_taxa(function(x) sum(x) > 1, T)
early.mix <- prune_samples(sample_data(mix)$response.group %in% c("early"), mix) %>%
  filter_taxa(function(x) sum(x) > 1, T)
late.mix <- prune_samples(sample_data(mix)$response.group %in% c("late"), mix) %>%
  filter_taxa(function(x) sum(x) > 1, T)

number.aliens.early.mix <- prune_taxa(as.character(mix.aliens), early.mix) %>%
  filter_taxa(function(x) sum(x) > 1, T) %>%
  otu_table() %>%
  nrow()

number.aliens.late.mix <- prune_taxa(as.character(mix.aliens), late.mix) %>%
  filter_taxa(function(x) sum(x) > 1, T) %>%
  otu_table() %>%
  nrow()

number.aliens.total.mix <- prune_taxa(as.character(mix.aliens), mix) %>%
  filter_taxa(function(x) sum(x) > 1, T) %>%
  otu_table() %>%
  nrow()

# print("Number of Mix alien OTUs detected in all Mix")
# number.aliens.total.mix
# print("Total number of OTUs detected in all Mix")
# nrow(otu_table(mix))
# print("Number of Mix alien OTUs detected in early Mix")
# number.aliens.early.mix
# print("Total number of OTUs detected in early Mix")
# nrow(otu_table(early.mix))
# print("Number of Mix alien OTUs detected in late Mix")
# number.aliens.late.mix
# print("Total number of OTUs detected in late Mix")
# nrow(otu_table(late.mix))
library(knitr)
a <- c("Alfalfa", number.aliens.total.alf, number.aliens.early.alf, number.aliens.late.alf)
b <- c("Compost", number.aliens.total.comp, number.aliens.early.comp, number.aliens.late.comp)
c <- c("Mix", number.aliens.total.mix, number.aliens.early.mix, number.aliens.late.mix)
x <- as.data.frame(rbind(a,b,c))
colnames(x) <- c("Treatment", "Throughout", "Eary", "Late")
rownames(x) <- NULL
kable(x, caption = "Number of OTUs considered aliens detected in each treatment")
Number of OTUs considered aliens detected in each treatment
Treatment Throughout Eary Late
Alfalfa 26 25 4
Compost 74 60 28
Mix 58 38 20

Alien Heatmaps

Function to get a list of OTUs from a sample type

GetOTUs <- function(physeq, samples) {
    prune_samples(sample_data(physeq)$treatment %in% c(samples), physeq) %>%
    filter_taxa(function(x) sum(x) > 1, T) %>%
    tax_table() %>%
    row.names()
}

Using function to generate list of OTUs from all starting soil, incubated soils and amendments

physeq.raw <- readRDS("data/RDS/incubation_physeq_Aug18.RDS")
# Incubated samples:
Alfalfa.otus <- GetOTUs(physeq.raw, c("Alfalfa"))
Compost.otus <- GetOTUs(physeq.raw, c("Compost"))
CompAlfa.otus <- GetOTUs(physeq.raw, c("CompAlfa"))
Control.otus <- GetOTUs(physeq.raw, c("Control"))
# Amendment samples:
AlfalfaAmend.otus <- GetOTUs(physeq.raw, c("AlfalfaAmend"))
CompostAmend.otus <- GetOTUs(physeq.raw, c("CompostAmend"))
# Soil sample:
AlfalfaSoil.otus <- GetOTUs(physeq.raw, c("AlfalfaSoil"))
GetAlienHeatMap <- function(physeq, control_otus, alien_otus, recieving_otus, samples){
  otus <- list(alien_otus, control_otus) 
  print("Looking for aliens between amendment and control soil")
  venn <- venn(otus)
  alf.aliens <- attr(venn, "intersections")$A
  aliens <- list(alf.aliens, recieving_otus)
  print("Detecting aliens in amended soil")
  aliens.venn <- venn(aliens)
  aliens.detected <- attr(aliens.venn,"intersections")$`A:B`
  incubated <- prune_samples(sample_data(physeq)$treatment %in% c(samples), physeq) %>%
    filter_taxa(function(x) sum(x) > 5, T) %>%
    transform_sample_counts(function(x) x / sum(x)) 
  incubated.aliens <- prune_taxa(aliens.detected, incubated)
  sample.order <- as.data.frame(sample_data(incubated.aliens)) %>%
    arrange(day, replication) %>%
    select(i_id) %>%
    remove_rownames()   
  alien.heatmap <- plot_heatmap(incubated.aliens, sample.label = "day", taxa.order= "Phylum", taxa.label = "Genus",
                                  sample.order = as.character(sample.order$i_id), 
                                  low = "#66CCFF", high = "#000033", na.value = "white")
  alien.heatmap
}
alf.heatmap <- GetAlienHeatMap(physeq, Control.otus, AlfalfaAmend.otus, Alfalfa.otus, c("Alfalfa"))
## [1] "Looking for aliens between amendment and control soil"

## [1] "Detecting aliens in amended soil"

alf.heatmap

comp.heatmap <- GetAlienHeatMap(physeq, Control.otus, CompostAmend.otus, Compost.otus, c("Compost"))
## [1] "Looking for aliens between amendment and control soil"

## [1] "Detecting aliens in amended soil"

comp.heatmap

I didn’t extract DNA from a mixed sample of compost and alfalfa, to get the list of potential aliens, we will combine the two lists from alfalfa and compost.

mix <- c(AlfalfaAmend.otus, CompostAmend.otus)
mix.heatmap <- GetAlienHeatMap(physeq, Control.otus, mix, CompAlfa.otus, c("Mix"))
## [1] "Looking for aliens between amendment and control soil"

## [1] "Detecting aliens in amended soil"

mix.heatmap

The relative abundance of the alien OTUs through the incubation is relatively low, but different between treatments. Could low abundance OTUs be drivers in this situation? We should compare the relative abundance of the dominant OTUs from each treatment.

Let’s make another heatmap, this one will have a list of OTUs from a treatment that is composed of the top ten OTUs by relative abundance from each day.

Day_top10 <- function(physeq, trt, days){
  trt <- prune_samples(sample_data(physeq)$treatment %in% c(trt), physeq)
  
  d0 <- subset_samples(trt, day == days[1]) 
  l0 <- names(sort(taxa_sums(d0), TRUE)[1:10])
  
  d7 <- subset_samples(trt, day == days[2]) 
  l7 <- names(sort(taxa_sums(d7), TRUE)[1:10])   
  
  d14 <- subset_samples(trt, day == days[3]) 
  l14 <- names(sort(taxa_sums(d14), TRUE)[1:10])   
  
  d21 <- subset_samples(trt, day == days[4]) 
  l21 <- names(sort(taxa_sums(d21), TRUE)[1:10])
  
  d35 <- subset_samples(trt, day == days[5]) 
  l35 <- names(sort(taxa_sums(d35), TRUE)[1:10])
  
  d49 <- subset_samples(trt, day == days[6]) 
  l49 <- names(sort(taxa_sums(d49), TRUE)[1:10])
  
  d97 <- subset_samples(trt, day == days[7])
  l97 <- names(sort(taxa_sums(d97), TRUE)[1:10])
  list <- c(l0, l7, l14, l21, l35, l49, l97)
  list
  phy <- prune_taxa(list, trt) %>%
      filter_taxa(function(x) sum(x) > 5, T) %>%
      transform_sample_counts(function(x) x / sum(x))
  sample.order <- as.data.frame(sample_data(phy)) %>%
    arrange(day, replication) %>%
    select(i_id) %>%
    remove_rownames()   
  heatmap <- plot_heatmap(phy, sample.label = "day", taxa.order= "Phylum", taxa.label = "Genus",
                                  sample.order = as.character(sample.order$i_id), 
                                  low = "#66CCFF", high = "#000033", na.value = "white")
  heatmap
}

Not rarefied for these heatmaps:

days <- c("0", "7", "14", "21", "35", "49", "97")
alf <- Day_top10(physeq.raw, c("Alfalfa"), days)
alf

Compost:

comp <- Day_top10(physeq.raw, c("Compost"), days)
comp

Mix:

mix <- Day_top10(physeq.raw, c("CompAlfa"), days)
mix

What are the common top OTUs from the three treatments?

venn(list("Alfalfa" = levels(alf$data$OTU), "Compost" = levels(comp$data$OTU), "Mix" = levels(mix$data$OTU)))